Dis manibus on Latin inscriptions##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##
## Attaching package: 'ape'
## The following objects are masked from 'package:igraph':
##
## degree, edges, mst, ring
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.8
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.1 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## ✖ purrr::compose() masks igraph::compose()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ tidyr::extract() masks stringdist::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::groups() masks igraph::groups()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::simplify() masks igraph::simplify()
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
## python
## "/opt/homebrew/Caskroom/miniforge/base/bin/python"
Sources: # http://amunategui.github.io/stringdist/
# installation of packages is recommended to do via the terminal (see in your console below), pip install name-of-the-required-package
# https://vincent.doba.fr/posts/20210407_install-fiona-on-windows/ & https://geopandas.org/getting_started/install.html Issues with geopandas on windows
#to install requirements type and run in the terminal: pip install -r requirements.txt
import fiona
import geopandas as gpd
import requests
import seaborn as sns
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
from scipy.stats import trapz
import scipy
#import nltk
import json
import tempun
#import sddk
import numpy as np
import pandas as pd
pd.options.display.max_columns = 1000 # to see all columns
Loading data from Sciencedata.dk
list_json <- jsonlite::fromJSON("https://sciencedata.dk/public/b6b6afdb969d378b70929e86e58ad975/EDH_text_cleaned_2021-01-21.json")
EDH <- as_tibble(list_json)
Loading data locally
# this will work only if you have the dataset available locally
EDH <- jsonlite::fromJSON("../data/large_data/EDH_text_cleaned_2021-01-21.json")
#dir.create("../figures")
Converting the text of inscriptions to lowercase
EDH <- EDH %>%
mutate(clean_text_interpretive_word_lowercase = str_to_lower(EDH$clean_text_interpretive_word)) %>%
mutate(clean_text_conservative_lowercase = str_to_lower(EDH$clean_text_conservative))
# preparation of coordinates
EDH<- EDH %>%
separate(col = coordinates, into = c("longitude", "latitude"), sep = ",")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 2464 rows [20,
## 34, 64, 71, 94, 149, 212, 221, 296, 311, 356, 382, 388, 390, 405, 406, 409, 412,
## 418, 421, ...].
EDH$latitude <- as.numeric(str_replace(EDH$latitude, pattern = "\\)", replacement=""))
EDH$longitude <- as.numeric(str_replace(EDH$longitude, pattern = "c\\(", replacement=""))
## Warning: NAs introduced by coercion
Typically occurs on funerary inscriptions, invocation of the underworld gods.
Canonic Versions:
D M - only in the conservative version of the text D M S - only in the conservative version Dis Manibus Diis Manibus Dis Manibus Sacrum Diis Manibus Sacrum
dis_manibus_regex_minimal <- "di{1,2}s manibus sacrum|di{1,2}s manibus"
dis_manibus_regex_maximal <- "\\bd m s\\b|\\bd m\\b|di{1,2}s manibus sacrum|di{1,2}s manibus|\\bdi{1,2} man[e|i]s\\b"
dis_manibus_int<- str_subset(EDH$clean_text_interpretive_word_lowercase, dis_manibus_regex_minimal)
dis_manibus_cons<- str_subset(EDH$clean_text_conservative_lowercase, dis_manibus_regex_maximal)
# separating them in a new attribute
EDH<- EDH %>%
mutate(form_dis_manibus_ins = str_extract(EDH$clean_text_interpretive_word_lowercase, dis_manibus_regex_minimal)) %>%
mutate(form_dis_manibus_cons = str_extract(EDH$clean_text_conservative_lowercase, dis_manibus_regex_maximal))
EDH$form_dis_manibus <- ifelse(grepl("\\w", EDH$form_dis_manibus_ins, ignore.case = T), EDH$form_dis_manibus_ins, EDH$form_dis_manibus_cons)
EDH$form_dis_manibus <- ifelse(grepl("d m s", EDH$form_dis_manibus, ignore.case = T), "dis manibus sacrum",
ifelse(grepl("d m", EDH$form_dis_manibus, ignore.case = T), "dis manibus",
EDH$form_dis_manibus))
table(EDH$form_dis_manibus)
##
## di manes dii manes diis manibus diis manibus sacrum
## 2 1 75 17
## dis manibus dis manibus sacrum
## 8712 3112
# overview
table(str_extract(EDH$clean_text_interpretive_word_lowercase, dis_manibus_regex_minimal))
##
## diis manibus diis manibus sacrum dis manibus dis manibus sacrum
## 75 17 8549 3102
table(str_extract(EDH$clean_text_conservative_lowercase, dis_manibus_regex_maximal))
##
## d m d m s di manes di manis
## 6671 2531 2 1
## dii manes diis manibus diis manibus sacrum dis manibus
## 1 54 7 250
## dis manibus sacrum
## 20
# capturing context - 5 words before the formula and 5 words after the formula
#EDH<- EDH %>%
# mutate(dis_manibus_in_context_10w = str_extract(EDH$clean_text_interpretive_word_lowercase, "(\\w*\\s){0,6}di{1,2}s manibus sacrum|di{1,2}s manibus(\\w*\\s){0,6}"))
Subsetting dataset to contain only records with formula
#non-empty dis manibus attribute
EDH_DM <- EDH %>%
filter(!is.na(form_dis_manibus))
Save the dataset locally
EDH_DMjson <- toJSON(EDH_DM, simplifyVector = TRUE)
write(EDH_DMjson, "../data/EDH_DM.json")
nrow(EDH %>%
filter(!is.na(form_dis_manibus)))/(nrow(EDH)/100)
## [1] 14.62885
EDH_DM %>%
count(type_of_inscription_clean, sort = T) %>%
mutate(ratio = round(n/(sum(n)/100),2))
epitaph_all<- length(EDH$type_of_inscription_clean[EDH$type_of_inscription_clean == "epitaph"])
epitaph_dm<- length(EDH_DM$type_of_inscription_clean[EDH_DM$type_of_inscription_clean == "epitaph"])
epitaph_dm/(epitaph_all/100)
## [1] 33.61357
form_dm<- EDH_DM %>%
#filter(type_of_inscription_clean %in% c("epitaph", "NULL")) %>%
filter(!is.na(form_dis_manibus)) %>%
count(form_dis_manibus, sort=T) %>%
mutate(ratio = round(n/(sum(n)/100),2))
form_dm
write_csv(form_dm, "../output/DM_form_total_numbers.csv")
EDM_DM_freq<- EDH_DM %>%
#count(province_label_clean, form_dis_manibus, sort=T) %>%
ggplot(aes(y=province_label_clean)) +
geom_bar(aes(fill=form_dis_manibus)) +
#coord_cartesian(xlim=c(0,1700)) +
theme_minimal() +
theme(text = element_text(size=12)) +
labs(y="Roman province", x="Formula", title= "Frequence of Dis Manibus formula by province", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)")))
ggsave("../figures/EDM_DM_province_freq.png")
## Saving 14 x 10 in image
EDM_DM_freq
DM_form<- EDH_DM %>%
select(form_dis_manibus, province_label_clean) %>%
count(form_dis_manibus, province_label_clean, sort=T)
EDH_DM %>%
ggplot(aes(y=province_label_clean, fill=form_dis_manibus)) +
geom_bar(position="fill") +
theme_minimal() +
theme(text = element_text(size=12)) +
geom_vline(xintercept=c(0.25, 0.5, 0.75), linetype="dotted", color = "blue", size=0.8) +
labs(y="Roman province", x="Formula", title= "Ratio of Dis Manibus formulae by province", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)")))
#geom_label(aes(label= form_dis_manibus)) +
#geom_label(aes(label = form_dis_manibus), colour = "black", fontface = "bold", hjust = -0.1)
EDH_DM$province_label_clean <-factor(EDH_DM$province_label_clean)
EDH_DM$form_dis_manibus <-factor(EDH_DM$form_dis_manibus)
EDH_DM %>%
count(form_dis_manibus, province_label_clean, sort=T) %>%
group_by(form_dis_manibus) %>%
filter(form_dis_manibus == "dis manibus sacrum") %>%
ggplot(aes(y=fct_reorder(province_label_clean, n), x=n)) +
geom_col(fill= "blue") +
labs(y="Roman province", x="n", title= "Frequence of 'Dis Manibus Sacrum' formula by province", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)"))) +
geom_text(aes(label = n), colour = "black", size= 3, hjust = -0.1)
EDH_DM %>%
count(form_dis_manibus, province_label_clean, sort=T) %>%
group_by(form_dis_manibus) %>%
filter(form_dis_manibus == "dis manibus") %>%
ggplot(aes(y=fct_reorder(province_label_clean, n), x=n)) +
geom_col(fill= "blue") +
labs(y="Roman province", x="n", title= "Frequence of 'Dis Manibus' formula by province", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)"))) +
geom_text(aes(label = n), colour = "black", size= 3, hjust = -0.1)
DM_form_province<- EDH_DM %>%
count(form_dis_manibus, province_label_clean, sort=T) %>%
pivot_wider(names_from = form_dis_manibus, values_from = n)
province_total_insc<- EDH %>%
count(province_label_clean)
provinces_DM<- merge(x=province_total_insc, y=DM_form_province, by="province_label_clean", all=T)
provinces_DM<- provinces_DM %>%
rename(all_insc=n)
provinces_DM <- provinces_DM %>%
mutate(dm = provinces_DM$`dis manibus`+provinces_DM$`diis manibus`) %>%
mutate(dms = provinces_DM$`dis manibus sacrum`+provinces_DM$`diis manibus sacrum`) %>%
mutate(dm_ratio = round(dm/(all_insc/100),2)) %>%
mutate(dms_ratio = round(dms/(all_insc/100),2))
provinces_DM
write_csv(provinces_DM, "../output/DM_form_provinces.csv")
#Filter those provinces where the ratio is higher than mean for all provinces fro a given formula
higher_dm<- provinces_DM %>%
filter(dm_ratio > mean(na.omit(provinces_DM$dm_ratio)))
higher_dm
higher_dms<- provinces_DM %>%
filter(dms_ratio > mean(na.omit(provinces_DM$dms_ratio)))
higher_dms
higher_dm_dms<- provinces_DM %>%
filter(dm_ratio > mean(na.omit(provinces_DM$dm_ratio)& dms_ratio > mean(na.omit(provinces_DM$dms_ratio))))
## Warning in na.omit(provinces_DM$dm_ratio) & dms_ratio >
## mean(na.omit(provinces_DM$dms_ratio)): longer object length is not a multiple of
## shorter object length
higher_dm_dms
# preparation of subsets
# dis manibus formula
EDH_dis_manibus <- EDH_DM %>%
filter(form_dis_manibus == "dis manibus")
# diis manibus formula
EDH_diis_manibus <- EDH_DM %>%
filter(form_dis_manibus == "diis manibus")
# dis manibus sacrum formula
EDH_dis_manibus_sacrum <- EDH_DM %>%
filter(form_dis_manibus == "dis manibus sacrum")
# diis manibus sacrum formula
EDH_diis_manibus_sacrum <- EDH_DM %>%
filter(form_dis_manibus == "diis manibus sacrum")
DM_map<- #head(EDH_DM, 100) %>%
leaflet(EDH_DM) %>%
leaflet(width="100%") %>%
#addProviderTiles("Stamen.Watercolor")%>% # Add CartoDB map tiles
addProviderTiles("Stamen.TerrainBackground")%>% # Add CartoDB map tiles
#addProviderTiles("Esri.WorldTopoMap", group = "Topo") %>%
#addProviderTiles("Esri.WorldImagery", group = "ESRI Aerial") %>%
setView( lng = 12.9239625, lat = 41.9515694, zoom = 3.5 ) %>%
#setMaxBounds(lat1=43.633977, lng1 =-11.227926 , lat2=35.133882 , lng2=50.882336) %>%
#addPolylines(data = roads, color = "purple", weight = 1, opacity = 0.7) %>%
addCircles(lng = EDH_dis_manibus$longitude,
lat = EDH_dis_manibus$latitude, opacity = 0.3, radius = 0.7, fill = T , color = "blue" , fillColor = "blue",) %>%
addCircles(lng = EDH_dis_manibus_sacrum$longitude,
lat = EDH_dis_manibus_sacrum$latitude, opacity = 0.3, radius = 0.7, fill = T , color = "red" , fillColor = "red",) %>%
addCircles(lng = EDH_diis_manibus$longitude,
lat = EDH_diis_manibus$latitude, opacity = 0.3, radius = 0.7, fill = T , color = "black" , fillColor = "black",) %>%
addCircles(lng = EDH_diis_manibus_sacrum$longitude,
lat = EDH_diis_manibus_sacrum$latitude, opacity = 0.3, radius = 0.7, fill = T , color = "orange" , fillColor = "orange",) %>%
#addAwesomeMarkers(~EDH$longitude, ~EDH$latitude, icon=icons) %>%
addLegend(position = "bottomright",
colors = c("Blue", "Green", "Red", "Orange"),
labels = c("dis manibus", "diis manibus", "dis manibus sacrum", "diis manibus sacrum"), opacity = 1,
title = "Dis Manibus formula (EDH)"
) %>%
addScaleBar(position="bottomleft")
## Warning in validateCoords(lng, lat, funcName): Data contains 2 rows with either
## missing or invalid lat/lon values and will be ignored
## Warning in validateCoords(lng, lat, funcName): Data contains 4 rows with either
## missing or invalid lat/lon values and will be ignored
## Warning in validateCoords(lng, lat, funcName): Data contains 185 rows with
## either missing or invalid lat/lon values and will be ignored
## Warning in validateCoords(lng, lat, funcName): Data contains 195 rows with
## either missing or invalid lat/lon values and will be ignored
DM_map
Preparation of the dataset - unique people records
EDH_small <- EDH_DM %>%
select(id, people, form_dis_manibus, type_of_inscription_clean, type_of_monument_clean)
EDH_smu <- EDH_small %>% unnest_wider(people)
names(EDH_smu)
## [1] "id" "name"
## [3] "cognomen" "nomen"
## [5] "person_id" "gender"
## [7] "praenomen" "age: years"
## [9] "status" "occupation"
## [11] "age: days" "age: months"
## [13] "origo" "tribus"
## [15] "age: hours" "supernomen"
## [17] "form_dis_manibus" "type_of_inscription_clean"
## [19] "type_of_monument_clean"
# this works only on Linux computer, not on Windows
EDH_people<- EDH_small %>% unnest_wider(people) %>%
unnest_longer(names(EDH_smu))
EDH_people<- EDH_people %>%
#filter(form_dis_manibus == "dis manibus" | form_dis_manibus == "dis manibus sacrum") %>%
mutate(gender_clean = str_replace_all(gender, "W\\?", "female")) %>%
mutate(gender_clean = str_replace_all(gender_clean, "M\\?", "male"))
EDH_people %>%
count(gender_clean, form_dis_manibus, sort=T) %>%
mutate(freq = n/(sum(n)/100)) %>%
ggplot(aes(y=gender_clean, x=n)) +
geom_col(aes(fill=form_dis_manibus)) +
#geom_label(aes(label = round(freq, 2)), hjust=0.3, size = 3.5) +
theme_linedraw(base_size = 12) +
labs(y="Gender", x="Formula", title= "Dis Manibus formula by gender", subtitle = ggtitle(paste("n =", length(EDH_DM$people), "inscriptions (EDH)")))
EDH_people %>%
ggplot(aes(x=gender_clean, fill=form_dis_manibus)) +
geom_bar(position="fill") +
#geom_label(aes(label = round(freq, 2)), hjust=0.3, size = 3.5) +
#theme_linedraw(base_size = 12) +
labs(y="Gender", x="Formula", title= "Ratio of Dis Manibus formula by gender", subtitle = ggtitle(paste("n =", length(EDH_DM$people), "inscriptions (EDH)")))
EDH_people %>%
#filter(form_dis_manibus == "dis manibus" | form_dis_manibus == "dis manibus sacrum") %>%
mutate(status_clean = str_replace_all(status, "\\?", "")) %>%
count(status_clean, form_dis_manibus, sort=T) %>%
ggplot(aes(y=status_clean, x=n)) +
geom_col(aes(fill=form_dis_manibus)) +
#coord_cartesian(xlim = c(0, 15800)) +
labs(y="Status", x="Individuals", title= "Dis Manibus formula by personal status", subtitle = ggtitle(paste("n =", nrow(EDH_people), "individuals (EDH)"))) +
theme(legend.position = c(0.9, 0.15)) +
scale_fill_discrete(name="Formula")
EDH_people<- EDH_people %>%
#filter(form_dis_manibus == "dis manibus" | form_dis_manibus == "dis manibus sacrum") %>%
mutate(status_clean = str_replace_all(status, "\\?", ""))
Status of a person was split into new column
status_highest that records only the highest status
category in the following order: 1) slaves, 2) freedmen / freedwomen, 3)
Augustales, 4) military personnel, 5) lower local offices,
administration of imperial estates, 6) decurial order, high local
offices, 7) equestrian order, 8) senatorial order, 9) rulers
(foreign)
EDH_people$status_highest <- ifelse(grepl("decurial order, higher local offices; military personnel", EDH_people$status_clean, ignore.case = T), "decurial order, higher local offices",
ifelse(grepl("equestrian order; decurial order, higher local offices", EDH_people$status_clean, ignore.case = T), "equestrian order",
ifelse(grepl("senatorial order; equestrian order", EDH_people$status_clean, ignore.case = T), "senatorial order",
ifelse(grepl("Augustales; freedmen / freedwomen", EDH_people$status_clean, ignore.case = T), "Augustales",
ifelse(grepl("decurial order, higher local offices; Augustales", EDH_people$status_clean, ignore.case = T), "decurial order, higher local offices",
ifelse(grepl("decurial order, higher local offices; freedmen / freedwomen", EDH_people$status_clean, ignore.case = T), "decurial order, higher local offices",
ifelse(grepl("equestrian order; lower local offices, administration of imperial estates", EDH_people$status_clean, ignore.case = T), "equestrian order",
EDH_people$status_clean)))))))
EDH_status_count <- EDH_people %>%
count(form_dis_manibus, status_clean, sort = T) %>%
mutate(ratio = round(n/(sum(n)/100),2))
mean_dms <- mean(EDH_status_count$ratio[EDH_status_count$form_dis_manibus == "dis manibus sacrum"])
EDH_people %>%
ggplot(aes(y=status_highest, fill=form_dis_manibus)) +
geom_bar(position="fill") +
#coord_cartesian(xlim = c(0, 15800)) +
labs(y="Status", x="Individuals", title= "Ratio of Dis Manibus formula by personal status", subtitle = ggtitle(paste("n =", nrow(EDH_people), "individuals (EDH)"))) +
#theme(legend.position = c(0.9, 0.15)) +
scale_fill_discrete(name="Formula") +
geom_vline(xintercept =mean_dms/10, linetype = "longdash", size=0.3)
EDH_people %>%
mutate(age_yr = as.numeric(`age: years`)) %>%
filter(!is.na(age_yr)) %>%
count(age_yr, form_dis_manibus, sort=T) %>%
ggplot(aes(age_yr, n, colour= form_dis_manibus)) +
geom_point(aes(colour=form_dis_manibus)) +
labs(y="Individuals", x="Years at death", title= "Dis Manibus formula by age in years of the deceased", subtitle = ggtitle(paste("n =", nrow(EDH_people), "individuals (EDH)")))+
geom_smooth(se = FALSE, method = lm) +
facet_wrap(~form_dis_manibus)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## `geom_smooth()` using formula 'y ~ x'
@continue here
average_age <- EDH_people %>%
mutate(age_yr = as.numeric(`age: years`)) %>%
filter(!is.na(age_yr)) %>%
select(age_yr)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
average_age_all<- mean(na.omit(average_age$age_yr))
average_age_per_formula <- EDH_people %>%
mutate(age_yr = as.numeric(`age: years`)) %>%
filter(!is.na(age_yr)) %>%
group_by(form_dis_manibus) %>%
summarise_at(vars(age_yr), list(name=mean)) %>%
mutate(diff_av_age = name-average_age_all)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
average_age_per_formula
Saving DM with people data locally:
EDH_DMpeoplejson <- toJSON(EDH_people, simplifyVector = TRUE)
write(EDH_DMpeoplejson, "../data/EDH_DM_people.json")
EDH_DM %>%
count(type_of_monument_clean, form_dis_manibus, sort = T) %>%
ggplot(aes(y=type_of_monument_clean, x=n)) +
geom_col(aes(fill=form_dis_manibus)) +
theme_minimal() +
labs(y="Inscribed object", x="Inscriptions", title= "Dis Manibus formula by inscribed object", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)")))
EDH_DM %>%
count(material_clean, form_dis_manibus, sort = T) %>%
ggplot(aes(y=material_clean, x=n)) +
geom_col(aes(fill=form_dis_manibus)) +
theme_minimal() +
labs(y="Inscribed material", x="Inscriptions", title= "Dis Manibus formula by inscribed material", subtitle = ggtitle(paste("n =", nrow(EDH_DM), "inscriptions (EDH)")))
Aarhus University, Denmark, https://orcid.org/0000-0002-6349-0540↩︎